In [107]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

sns.set(color_codes=True)

In [250]:
data = pd.read_csv('../data/belorusskoye_in_out_1_min_2016_04.txt', delimiter='\t')
data.head()


Out[250]:
station_code station_name month week_day day_type hour minute passengers_in passenger_out
0 2000006 МОСКВА БЕЛОРУССКАЯ апрель понедельник раб 0 0 11 2
1 2000006 МОСКВА БЕЛОРУССКАЯ апрель понедельник раб 0 1 8 0
2 2000006 МОСКВА БЕЛОРУССКАЯ апрель понедельник раб 0 2 7 0
3 2000006 МОСКВА БЕЛОРУССКАЯ апрель понедельник раб 0 3 14 2
4 2000006 МОСКВА БЕЛОРУССКАЯ апрель понедельник раб 0 4 11 2

In [ ]:

Данные за рабочую среду апреля


In [98]:


In [251]:
data_wed = data.query('week_day == "wed"')
stations = map(lambda x: x, set(data['station_name'].as_matrix()))

In [253]:
pass_in = list(0 for i in range(1440))
pass_out = list(0 for i in range(1440))

In [254]:
pd.DataFrame(stations)


Out[254]:
0
0 СЕТУНЬ
1 КУНЦЕВО 1
2 ФИЛИ
3 МОЖАЙСК
4 ТУЧКОВО
5 БЕГОВАЯ
6 БАКОВКА
7 ГОЛИЦЫНО
8 ЖАВОРОНКИ
9 ОТРАДНОЕ СМОЛ.
10 ОДИНЦОВО
11 МОСКВА БЕЛОРУССКАЯ

In [255]:
belorus = pd.DataFrame(data.query('station_name == "МОСКВА БЕЛОРУССКАЯ"'))

In [256]:
belorus['time'] = 60 * belorus['hour'] + belorus['minute']

In [257]:
window(belorus['passengers_in'].as_matrix().transpose()).shape


Out[257]:
(8630,)

In [258]:
belorus['time'][N-1:].shape


Out[258]:
(8630,)

In [269]:
N = 30
def window(x):
    return np.convolve(x, np.ones((N,))/N, mode='valid')

plt.plot(belorus['time'][N-1:], window(belorus['passengers_in'].as_matrix().transpose()), 'r-')
plt.plot(belorus['time'][N-1:], window(belorus['passenger_out'].as_matrix().transpose()), 'b-')
plt.axis([0,1440,0,600])
plt.show()



In [261]:
def in_out_dict(f):
    return [{'time' : row[1]['time'], 'in': row[1]['passengers_in'], 'out': row[1]['passenger_out']} 
        for row in f.iterrows()]

In [262]:
from json import dumps, dump
import os

Сохранение данных


In [ ]:


In [265]:
path = '../data/stations_in_out/'
if not os.path.exists(path):
    os.mkdir(path)
    
for station in stations:
    for day_type in set(data['day_type'].values):
        for week_day in set(data['week_day'].values):
            station_data = pd.DataFrame(data.query('station_name == "{0}" and week_day == "{1}" and day_type == "{2}"'.format(station, week_day, day_type)))
            station_data['time'] = 60 * station_data['hour'] + station_data['minute']
            result = in_out_dict(station_data)
            if not len(result) == 0:
                with open(path + 'data_{0}_{1}_{2}.json'.format(station, week_day, day_type), 'w') as f:
                    dump(in_out_dict(station_data), f)

In [277]:
for station in stations:
    for day_type in set(data['day_type'].values):
        for week_day in set(data['week_day'].values):
            station_data = pd.DataFrame(data.query('station_name == "{0}" and week_day == "{1}" and day_type == "{2}"'.format(station, week_day, day_type)))
            if len(station_data) == 0:
                continue
            station_data['time'] = 60 * station_data['hour'] + station_data['minute']
            plt.plot(station_data['time'][N-1:], window(station_data['passengers_in'].as_matrix().transpose()), 'r-')
            plt.plot(station_data['time'][N-1:], window(station_data['passenger_out'].as_matrix().transpose()), 'b-')
#             plt.axis([0,1440])
            title = 'station_name == "{0}" and week_day == "{1}" and day_type == "{2}"'.format(station, week_day, day_type).decode('utf-8')
            print title
            plt.title(title)
            plt.show()


station_name == "СЕТУНЬ" and week_day == "суббота" and day_type == "вых"
station_name == "СЕТУНЬ" and week_day == "воскресенье" and day_type == "вых"
station_name == "СЕТУНЬ" and week_day == "вторник" and day_type == "раб"
station_name == "СЕТУНЬ" and week_day == "четверг" and day_type == "раб"
station_name == "СЕТУНЬ" and week_day == "среда" and day_type == "раб"
station_name == "СЕТУНЬ" and week_day == "пятница" and day_type == "раб"
station_name == "СЕТУНЬ" and week_day == "понедельник" and day_type == "раб"
station_name == "КУНЦЕВО 1" and week_day == "суббота" and day_type == "вых"
station_name == "КУНЦЕВО 1" and week_day == "воскресенье" and day_type == "вых"
station_name == "КУНЦЕВО 1" and week_day == "вторник" and day_type == "раб"
station_name == "КУНЦЕВО 1" and week_day == "четверг" and day_type == "раб"
station_name == "КУНЦЕВО 1" and week_day == "среда" and day_type == "раб"
station_name == "КУНЦЕВО 1" and week_day == "пятница" and day_type == "раб"
station_name == "КУНЦЕВО 1" and week_day == "понедельник" and day_type == "раб"
station_name == "ФИЛИ" and week_day == "суббота" and day_type == "вых"
station_name == "ФИЛИ" and week_day == "воскресенье" and day_type == "вых"
station_name == "ФИЛИ" and week_day == "вторник" and day_type == "раб"
station_name == "ФИЛИ" and week_day == "четверг" and day_type == "раб"
station_name == "ФИЛИ" and week_day == "среда" and day_type == "раб"
station_name == "ФИЛИ" and week_day == "пятница" and day_type == "раб"
station_name == "ФИЛИ" and week_day == "понедельник" and day_type == "раб"
station_name == "МОЖАЙСК" and week_day == "суббота" and day_type == "вых"
station_name == "МОЖАЙСК" and week_day == "воскресенье" and day_type == "вых"
station_name == "МОЖАЙСК" and week_day == "вторник" and day_type == "раб"
station_name == "МОЖАЙСК" and week_day == "четверг" and day_type == "раб"
station_name == "МОЖАЙСК" and week_day == "среда" and day_type == "раб"
station_name == "МОЖАЙСК" and week_day == "пятница" and day_type == "раб"
station_name == "МОЖАЙСК" and week_day == "понедельник" and day_type == "раб"
station_name == "ТУЧКОВО" and week_day == "суббота" and day_type == "вых"
station_name == "ТУЧКОВО" and week_day == "воскресенье" and day_type == "вых"
station_name == "ТУЧКОВО" and week_day == "вторник" and day_type == "раб"
station_name == "ТУЧКОВО" and week_day == "четверг" and day_type == "раб"
station_name == "ТУЧКОВО" and week_day == "среда" and day_type == "раб"
station_name == "ТУЧКОВО" and week_day == "пятница" and day_type == "раб"
station_name == "ТУЧКОВО" and week_day == "понедельник" and day_type == "раб"
station_name == "БЕГОВАЯ" and week_day == "суббота" and day_type == "вых"
station_name == "БЕГОВАЯ" and week_day == "воскресенье" and day_type == "вых"
station_name == "БЕГОВАЯ" and week_day == "вторник" and day_type == "раб"
station_name == "БЕГОВАЯ" and week_day == "четверг" and day_type == "раб"
station_name == "БЕГОВАЯ" and week_day == "среда" and day_type == "раб"
station_name == "БЕГОВАЯ" and week_day == "пятница" and day_type == "раб"
station_name == "БЕГОВАЯ" and week_day == "понедельник" and day_type == "раб"
station_name == "БАКОВКА" and week_day == "суббота" and day_type == "вых"
station_name == "БАКОВКА" and week_day == "воскресенье" and day_type == "вых"
station_name == "БАКОВКА" and week_day == "вторник" and day_type == "раб"
station_name == "БАКОВКА" and week_day == "четверг" and day_type == "раб"
station_name == "БАКОВКА" and week_day == "среда" and day_type == "раб"
station_name == "БАКОВКА" and week_day == "пятница" and day_type == "раб"
station_name == "БАКОВКА" and week_day == "понедельник" and day_type == "раб"
station_name == "ГОЛИЦЫНО" and week_day == "суббота" and day_type == "вых"
station_name == "ГОЛИЦЫНО" and week_day == "воскресенье" and day_type == "вых"
station_name == "ГОЛИЦЫНО" and week_day == "вторник" and day_type == "раб"
station_name == "ГОЛИЦЫНО" and week_day == "четверг" and day_type == "раб"
station_name == "ГОЛИЦЫНО" and week_day == "среда" and day_type == "раб"
station_name == "ГОЛИЦЫНО" and week_day == "пятница" and day_type == "раб"
station_name == "ГОЛИЦЫНО" and week_day == "понедельник" and day_type == "раб"
station_name == "ЖАВОРОНКИ" and week_day == "суббота" and day_type == "вых"
station_name == "ЖАВОРОНКИ" and week_day == "воскресенье" and day_type == "вых"
station_name == "ЖАВОРОНКИ" and week_day == "вторник" and day_type == "раб"
station_name == "ЖАВОРОНКИ" and week_day == "четверг" and day_type == "раб"
station_name == "ЖАВОРОНКИ" and week_day == "среда" and day_type == "раб"
station_name == "ЖАВОРОНКИ" and week_day == "пятница" and day_type == "раб"
station_name == "ЖАВОРОНКИ" and week_day == "понедельник" and day_type == "раб"
station_name == "ОТРАДНОЕ СМОЛ." and week_day == "суббота" and day_type == "вых"
station_name == "ОТРАДНОЕ СМОЛ." and week_day == "воскресенье" and day_type == "вых"
station_name == "ОТРАДНОЕ СМОЛ." and week_day == "вторник" and day_type == "раб"
station_name == "ОТРАДНОЕ СМОЛ." and week_day == "четверг" and day_type == "раб"
station_name == "ОТРАДНОЕ СМОЛ." and week_day == "среда" and day_type == "раб"
station_name == "ОТРАДНОЕ СМОЛ." and week_day == "пятница" and day_type == "раб"
station_name == "ОТРАДНОЕ СМОЛ." and week_day == "понедельник" and day_type == "раб"
station_name == "ОДИНЦОВО" and week_day == "суббота" and day_type == "вых"
station_name == "ОДИНЦОВО" and week_day == "воскресенье" and day_type == "вых"
station_name == "ОДИНЦОВО" and week_day == "вторник" and day_type == "раб"
station_name == "ОДИНЦОВО" and week_day == "четверг" and day_type == "раб"
station_name == "ОДИНЦОВО" and week_day == "среда" and day_type == "раб"
station_name == "ОДИНЦОВО" and week_day == "пятница" and day_type == "раб"
station_name == "ОДИНЦОВО" and week_day == "понедельник" and day_type == "раб"
station_name == "МОСКВА БЕЛОРУССКАЯ" and week_day == "суббота" and day_type == "вых"
station_name == "МОСКВА БЕЛОРУССКАЯ" and week_day == "воскресенье" and day_type == "вых"
station_name == "МОСКВА БЕЛОРУССКАЯ" and week_day == "вторник" and day_type == "раб"
station_name == "МОСКВА БЕЛОРУССКАЯ" and week_day == "четверг" and day_type == "раб"
station_name == "МОСКВА БЕЛОРУССКАЯ" and week_day == "среда" and day_type == "раб"
station_name == "МОСКВА БЕЛОРУССКАЯ" and week_day == "пятница" and day_type == "раб"
station_name == "МОСКВА БЕЛОРУССКАЯ" and week_day == "понедельник" and day_type == "раб"

In [ ]:


In [ ]: